#library(nCov2019)
library(leaflet)
library(dplyr)
library(ggplot2)
library(plotly)
library(scales)
library(xts)
library(dygraphs)
library(corrplot)
COVID<-read.csv("covid_19_data.csv")
COVID_2<-read.csv("COVID19_5-Apr.csv")
Format date:
Date<-as.Date(COVID_2$Date, format="%m/%d/%y")
COVID_2$Date2<-Date
COVID_updated<-COVID_2 %>% filter(Date2==max(Date2))
leaflet(width = "100%") %>%
addProviderTiles("CartoDB.DarkMatter") %>%
setView(lng = 0, lat = 10, zoom = 1.5) %>%
addCircleMarkers(data = COVID_updated,
lng = ~ Long,
lat = ~ Lat,
radius = ~ log(Confirmed+1),
color = rgb(218/255,65/255,56/255),
fillOpacity = ~ ifelse(Confirmed > 0, 1, 0),
stroke = FALSE,
label = ~ paste(Province.State,",",Country.Region, ": ", Confirmed)
)
Current top 10 countries:
COVID_top<-COVID_2 %>% filter(Date2==max(Date2)) %>%
group_by(Country.Region) %>% summarise(Total_confirmed=sum(Confirmed)) %>%
top_n(10,Total_confirmed) %>% arrange(desc(Total_confirmed))
plot<-ggplot(data=COVID_top
, aes(x=Total_confirmed,y=reorder(Country.Region,Total_confirmed))) +
geom_bar(stat ="identity",alpha=0.8,fill="firebrick3") +
geom_text(aes(label=Total_confirmed), vjust=0.5, hjust=0.9,color="black", size=3.5) +
scale_x_continuous(labels = comma) +
labs(title = paste("Top 10 countries with confirmed cases as of ",max(COVID_2$Date2)),
x = "Confirmed cases",
y = "Country") +
theme_minimal()
ggplotly(plot,tooltip = c("x"),width=750)
Time distribution:
COVID_2_Day<- COVID_2 %>% group_by(Date2) %>% summarise(World_confirmed=sum(Confirmed),
World_deaths=sum(Deaths),
World_recovered=sum(Recovered))
COVID_Day_confirmed_series<-xts(COVID_2_Day$World_confirmed, order.by=COVID_2_Day$Date2)
COVID_Day_deaths_series<-xts(COVID_2_Day$World_deaths, order.by=COVID_2_Day$Date2)
COVID_Day_recovered_series<-xts(COVID_2_Day$World_recovered, order.by=COVID_2_Day$Date2)
Day_summary<-cbind(COVID_Day_confirmed_series,COVID_Day_deaths_series,COVID_Day_recovered_series)
dygraph(Day_summary, main = "SARS-COV2-outbreak: Total worldwide cases",
xlab="Date", ylab="Total cases",width = 750) %>%
dySeries("COVID_Day_confirmed_series", "Total cases",drawPoints = TRUE,
pointSize = 3, color=rgb(53/255,116/255,199/255)) %>%
dySeries("COVID_Day_deaths_series", "Total deaths",drawPoints = TRUE,
pointSize = 3, color=rgb(189/255,55/255,48/255)) %>%
dySeries("COVID_Day_recovered_series", "Total recovered",drawPoints = TRUE,
pointSize = 3, color=rgb(69/255,136/255,51/255)) %>%
dyRangeSelector()
Team members countries total cases:
COVID_2_Day_Lebanon<- COVID_2 %>%
filter(Country.Region %in% c("Lebanon")) %>%
group_by(Date2) %>% summarise(World_confirmed=sum(Confirmed))
COVID_2_Day_Chile<- COVID_2 %>%
filter(Country.Region %in% c("Chile")) %>%
group_by(Date2) %>% summarise(World_confirmed=sum(Confirmed))
COVID_2_Day_Colombia<- COVID_2 %>%
filter(Country.Region %in% c("Colombia")) %>%
group_by(Date2) %>% summarise(World_confirmed=sum(Confirmed))
COVID_2_Day_CostaRica<- COVID_2 %>%
filter(Country.Region %in% c("Costa Rica")) %>%
group_by(Date2) %>% summarise(World_confirmed=sum(Confirmed))
COVID_Day_series_Lebanon<-xts(COVID_2_Day_Lebanon$World_confirmed, order.by=COVID_2_Day_Lebanon$Date2)
COVID_Day_series_Chile<-xts(COVID_2_Day_Chile$World_confirmed, order.by=COVID_2_Day_Chile$Date2)
COVID_Day_series_Colombia<-xts(COVID_2_Day_Colombia$World_confirmed, order.by=COVID_2_Day_Colombia$Date2)
COVID_Day_series_CostaRica<-xts(COVID_2_Day_CostaRica$World_confirmed, order.by=COVID_2_Day_CostaRica$Date2)
Our_Countries<-cbind(COVID_Day_series_Lebanon,COVID_Day_series_Chile,COVID_Day_series_Colombia,COVID_Day_series_CostaRica)
dygraph(Our_Countries, main = "SARS-COV2-outbreak: Total cases by country", xlab="Date", ylab="Total cases",width = 750) %>%
dySeries("COVID_Day_series_Lebanon", "Lebanon",drawPoints = TRUE,
pointSize = 3, color=rgb(0,0,3/255)) %>%
dySeries("COVID_Day_series_Chile", "Chile",drawPoints = TRUE,
pointSize = 3,color=rgb(120/255,28/255,109/255)) %>%
dySeries("COVID_Day_series_Colombia", "Colombia",drawPoints = TRUE,
pointSize = 3,color=rgb(237/255,105/255,37/255)) %>%
dySeries("COVID_Day_series_CostaRica", "Costa Rica",drawPoints = TRUE,
pointSize = 3,color=rgb(204/255,197/255,126/255)) %>%
dyRangeSelector()
fig <- plot_ly(COVID_updated, x = ~Confirmed, y = ~Deaths, z = ~Recovered, width=750) %>%
add_markers(text= ~Country.Region ,hoverinfo= "text",
marker = list(color=rgb(189/255,55/255,48/255))) %>%
layout(title="Confirmed cases Vs. Deaths Vs. Recovered", scene = list(
xaxis = list(title = 'Confirmed'),
yaxis = list(title = 'Deaths'),
zaxis = list(title = 'Recovered')))
fig
HDI<-read.csv("Human Development Index (HDI)_2.csv",sep=";",dec=",")
COVID_Country<-COVID_2 %>% filter(Date2==max(Date2)) %>%
group_by(Country.Region) %>% summarise(Total_confirmed=sum(Confirmed),
Total_deaths=sum(Deaths),
Total_Recovered=sum(Recovered))
Remove after parentheses:
HDI$Country_2<-gsub("\\s*\\([^\\)]+\\)","",as.character(HDI$Country))
HDI$Country_2[HDI$Country_2=="United States"]<-"US"
HDI$Country_2[HDI$Country_2=="Korea"]<-"South Korea"
Population:
Population<-read.csv("World_population.csv",sep=";",dec=",")
Remove after commma:
Population$Country_Name_2<-gsub(",.*", "", as.character(Population$Country_Name))
Population$Country_Name_2[Population$Country_Name_2=="United States"]<-"US"
Population$Country_Name_2[Population$Country_Code=="KOR"]<-"South Korea"
Population$Country_Name_2[Population$Country_Code=="CZE"]<-"Czechia"
Natural Join:
COVID_3<- COVID_Country %>% inner_join(HDI,by=c("Country.Region"="Country_2")) %>%
inner_join(Population,by=c("Country.Region"="Country_Name_2")) %>%
select(Country.Region,Total_confirmed,Total_deaths,Total_Recovered,HDI_Rank_2018,Year_2018,
Country_Code,Population_2018) %>%
mutate(Cases_million=(Total_confirmed/Population_2018)*1000000,
Recovered_percentage=(Total_Recovered/Total_confirmed)*100)
COVID_3<-COVID_3[!is.na(COVID_3$Population_2018),]
Plot the Human Development Index(HDI) Vs. the number of cases (applying a log transformation), and the proportion of recovered cases:
plot<-ggplot(data=COVID_3,aes(x=log(Cases_million),y=Year_2018,
size=Recovered_percentage,text=Country.Region)) +
geom_point(color="black",fill=rgb(237/255,105/255,37/255),shape=21,alpha=0.6) +
scale_size(range = c(3,15), name="Recovered \n percentage") +
theme_minimal() +
theme(legend.position="bottom") +
labs(title="HDI Vs. logarithmus of COVID-19 cases by million inhabitants \n and proportion of recovered",
x="ln(Cases/1M population)",
y="HDI")
ggplotly(plot,tooltip = c("text"),width=750)
COVID_numeric_1<-COVID_3 %>% mutate(Log_cases=log(Cases_million),
Death_percentage=(Total_deaths/Total_confirmed)*100) %>%
select(Log_cases,Recovered_percentage,Death_percentage,Year_2018)
corrplot(cor(COVID_numeric_1),method = "number",tl.col="black",tl.srt=15)